This document does some initial exploration of the FAA flight delay data. Starting now with 2015 Airline Service Quality Performance (ASQP) data.

dim(d_15)
## [1] 971365     55
summary(d_15)
##      ï..ID              YEAR         QUARTER          MONTH       
##  Min.   :      1   Min.   :2015   Min.   :1.000   Min.   : 1.000  
##  1st Qu.: 242842   1st Qu.:2015   1st Qu.:1.000   1st Qu.: 1.000  
##  Median : 485683   Median :2015   Median :3.000   Median : 8.000  
##  Mean   : 612321   Mean   :2015   Mean   :2.516   Mean   : 5.581  
##  3rd Qu.: 728524   3rd Qu.:2015   3rd Qu.:4.000   3rd Qu.:10.000  
##  Max.   :4877622   Max.   :2015   Max.   :4.000   Max.   :10.000  
##                                                                   
##   DAY_OF_MONTH    DAY_OF_WEEK                 FLIGHT_DATE    
##  Min.   : 1.00   Min.   :1.000   2015-01-02 00:00:00: 16741  
##  1st Qu.: 8.00   1st Qu.:2.000   2015-01-05 00:00:00: 16548  
##  Median :16.00   Median :4.000   2015-01-04 00:00:00: 16352  
##  Mean   :15.85   Mean   :4.023   2015-10-15 00:00:00: 16190  
##  3rd Qu.:23.00   3rd Qu.:6.000   2015-10-08 00:00:00: 16184  
##  Max.   :31.00   Max.   :7.000   2015-10-12 00:00:00: 16128  
##                                  (Other)            :873222  
##  UNIQUE_CARRIER     AIRLINE_ID       CARRIER          TAIL_NUM     
##  WN     :204558   Min.   :19393   WN     :204558          :  3126  
##  AA     :152840   1st Qu.:19790   AA     :152840   N484HA :   544  
##  DL     :138261   Median :19805   DL     :138261   N492HA :   544  
##  OO     : 96922   Mean   :19945   OO     : 96922   N491HA :   528  
##  EV     : 92022   3rd Qu.:20355   EV     : 92022   N477HA :   527  
##  UA     : 84289   Max.   :21171   UA     : 84289   N488HA :   526  
##  (Other):202473                   (Other):202473   (Other):965570  
##    FLIGHT_NUM       ORIGIN                    ORIGIN_CITY_NAME 
##  Min.   :   1   ATL    : 61464   Chicago, IL          : 66445  
##  1st Qu.: 741   DFW    : 52294   Atlanta, GA          : 61464  
##  Median :1675   ORD    : 52085   Dallas/Fort Worth, TX: 52294  
##  Mean   :2157   LAX    : 36289   New York, NY         : 36573  
##  3rd Qu.:3185   DEN    : 33448   Los Angeles, CA      : 36289  
##  Max.   :9793   SFO    : 27508   Houston, TX          : 35949  
##                 (Other):708277   (Other)              :682351  
##   ORIGIN_STATE    ORIGIN_STATE_FIPS  ORIGIN_STATE_NAME    ORIGIN_WAC  
##  TX     :124213   Min.   : 1.00     Texas     :124213   Min.   : 1.0  
##  CA     :120088   1st Qu.:12.00     California:120088   1st Qu.:34.0  
##  FL     : 76370   Median :25.00     Florida   : 76370   Median :53.0  
##  IL     : 69191   Mean   :26.36     Illinois  : 69191   Mean   :55.9  
##  GA     : 63849   3rd Qu.:42.00     Georgia   : 63849   3rd Qu.:82.0  
##  NY     : 45907   Max.   :78.00     New York  : 45907   Max.   :93.0  
##  (Other):471747                     (Other)   :471747                 
##       DEST                      DEST_CITY_NAME     DEST_STATE    
##  ATL    : 61377   Chicago, IL          : 67282   TX     :124531  
##  ORD    : 52927   Atlanta, GA          : 61377   CA     :120155  
##  DFW    : 52746   Dallas/Fort Worth, TX: 52746   FL     : 76344  
##  LAX    : 36437   Los Angeles, CA      : 36437   IL     : 69969  
##  DEN    : 33412   New York, NY         : 36377   GA     : 63733  
##  SFO    : 27540   Houston, TX          : 35827   NY     : 45755  
##  (Other):706926   (Other)              :681319   (Other):470878  
##  DEST_STATE_FIPS   DEST_STATE_NAME      DEST_WAC     CRS_DEP_TIME_HR
##  Min.   : 1.00   Texas     :124531   Min.   : 1.00   Min.   : 0.00  
##  1st Qu.:12.00   California:120155   1st Qu.:34.00   1st Qu.: 9.00  
##  Median :25.00   Florida   : 76344   Median :53.00   Median :13.00  
##  Mean   :26.35   Illinois  : 69969   Mean   :55.91   Mean   :12.95  
##  3rd Qu.:42.00   Georgia   : 63733   3rd Qu.:82.00   3rd Qu.:17.00  
##  Max.   :78.00   New York  : 45755   Max.   :93.00   Max.   :23.00  
##                  (Other)   :470878                                  
##  CRS_DEP_TIME_MIN  DEP_TIME_HR      DEP_TIME_MIN      DEP_DELAY     
##  Min.   : 0.00    17     : 63211   55     : 22579   -3     : 79443  
##  1st Qu.:10.00    8      : 61328   57     : 20944   -4     : 78215  
##  Median :25.00    10     : 60503   56     : 20938   -5     : 77817  
##  Mean   :26.58    11     : 60333   58     : 20577   -2     : 74719  
##  3rd Qu.:43.00    6      : 60177   54     : 20051   -1     : 65517  
##  Max.   :59.00    13     : 59982   0      : 20038   -6     : 57721  
##                   (Other):605831   (Other):846238   (Other):537933  
##  DEP_DELAY_MINS   DEP_DELAY_15  DEP_DELAY_GRPS      DEP_TIME_BLK   
##  0      :628463   0   :801862   -1     :570024   0600-0659: 67449  
##  1      : 26965   1   :155288   0      :228268   1700-1759: 67008  
##  2      : 19906   NULL: 14215   1      : 61275   0700-0759: 66558  
##  3      : 17464                 2      : 29980   0800-0859: 66142  
##  4      : 15306                 3      : 18116   1300-1359: 62950  
##  NULL   : 14215                 NULL   : 14215   1100-1159: 62591  
##  (Other):249046                 (Other): 49487   (Other)  :578667  
##     TAXI_OUT        WHEELS_OFF       WHEELS_ON         TAXI_IN      
##  12     : 76997   NULL   : 14546   NULL   : 15089   4      :154723  
##  11     : 76263   610    :  1543   1634   :  1141   5      :153709  
##  13     : 74087   608    :  1508   1853   :  1140   6      :123201  
##  10     : 70594   611    :  1494   1641   :  1136   7      : 92360  
##  14     : 67444   609    :  1460   1645   :  1136   3      : 88147  
##  15     : 60759   612    :  1420   1628   :  1134   8      : 69870  
##  (Other):545221   (Other):949394   (Other):950589   (Other):289355  
##  CRS_ARR_TIME_HR CRS_ARR_TIME_MIN  ARR_TIME_HR      ARR_TIME_MIN   
##  Min.   : 0.00   Min.   : 0.00    16     : 63796   40     : 16288  
##  1st Qu.:11.00   1st Qu.:14.00    18     : 61270   55     : 16278  
##  Median :15.00   Median :29.00    14     : 60204   54     : 16238  
##  Mean   :14.72   Mean   :28.74    20     : 59785   50     : 16190  
##  3rd Qu.:19.00   3rd Qu.:45.00    19     : 59020   45     : 16173  
##  Max.   :23.00   Max.   :59.00    17     : 58970   51     : 16165  
##                                   (Other):608320   (Other):874033  
##    ARR_DELAY      ARR_DELAY_MINS   ARR_DELAY_15  ARR_DELAY_GRPS  
##  -8     : 30162   0      :626003   0   :795462   -1     :409469  
##  -9     : 30161   1      : 19287   1   :159320   -2     :195448  
##  -10    : 30146   2      : 17748   NULL: 16583   0      :190545  
##  -7     : 29290   NULL   : 16583                 1      : 65172  
##  -11    : 29246   3      : 16279                 2      : 30487  
##  -6     : 28845   4      : 15307                 3      : 17724  
##  (Other):793515   (Other):260158                 (Other): 62520  
##     ARR_TIME_BLK      CANCELLED       CANCELLATION_CODE    DIVERTED       
##  1600-1659: 68376   Min.   :0.00000    :956692          Min.   :0.000000  
##  1800-1859: 63125   1st Qu.:0.00000   A:  3962          1st Qu.:0.000000  
##  1400-1459: 62786   Median :0.00000   B:  8067          Median :0.000000  
##  1000-1059: 60465   Mean   :0.01511   C:  2643          Mean   :0.001966  
##  2000-2059: 60267   3rd Qu.:0.00000   D:     1          3rd Qu.:0.000000  
##  1200-1259: 60166   Max.   :1.00000                     Max.   :1.000000  
##  (Other)  :596180                                                         
##  CRS_ELAPSED_TIME ACTUAL_ELAPSED_TIME    AIR_TIME         FLIGHTS 
##  Min.   : 22.0    NULL   : 16583      NULL   : 16583   Min.   :1  
##  1st Qu.: 85.0    79     :  7909      43     :  8311   1st Qu.:1  
##  Median :123.0    81     :  7861      59     :  8211   Median :1  
##  Mean   :141.8    80     :  7844      62     :  8211   Mean   :1  
##  3rd Qu.:175.0    76     :  7741      55     :  8177   3rd Qu.:1  
##  Max.   :718.0    77     :  7699      57     :  8153   Max.   :1  
##                   (Other):915728      (Other):913719              
##     DISTANCE       DISTANCE_GRP   
##  Min.   :  31.0   Min.   : 1.000  
##  1st Qu.: 370.0   1st Qu.: 2.000  
##  Median : 647.0   Median : 3.000  
##  Mean   : 819.6   Mean   : 3.749  
##  3rd Qu.:1066.0   3rd Qu.: 5.000  
##  Max.   :4983.0   Max.   :11.000  
## 
delay_summary <- d_15 %>%
  group_by(CARRIER, MONTH) %>%
  summarize(n_flights = n(),
            mean_dep_delay = mean(as.numeric(DEP_DELAY_MINS)),
            mean_arr_delay = mean(as.numeric(ARR_DELAY_MINS)))
p1 <- ggplot(delay_summary) +
  geom_point(aes(MONTH, mean_arr_delay, color = CARRIER)) +
 # facet_wrap(~CARRIER) +
  ggtitle("Mean arrival delay in minutes by carrier for 2015")

ggplotly(p1)